# -*- coding: utf-8 -*-

import scrapy
from scrapy.loader import ItemLoader
from chinahr.items import JobInfoItem, ComInfoItem
from scrapyluke.processors import *


class HighpinSpider(scrapy.Spider):
    name = "highpin"
    allowed_domains = ["highpin.cn"]
    start_urls = ["http://www.highpin.cn/zhiwei/p_1.html"]

    def parse(self, response):
        if response.xpath('//div[@class="clearfix bor-bottom add-bg"]'):
            job_urls = response.xpath('//p[@class="jobname clearfix"]/a/@href').extract()
            for url in job_urls:
                yield scrapy.Request('http://www.highpin.cn%s' % url, callback=self.parse_info)
            page = re.findall(r'/p_(\d+).html', response.url)[0]
            curr_page = int(page) + 1
            yield scrapy.Request('http://www.highpin.cn/zhiwei/p_%d.html' % curr_page, callback=self.parse)

    def parse_info(self, response):
        loaderJob = ItemLoader(item=JobInfoItem(),response=response)
        loaderCom = ItemLoader(item=JobInfoItem(),response=response)
        loaderJob.add_value('url', value=response.url)
        loaderJob.add_xpath('job_name', '//div[@class="view-title-box mt_0"]/div/h1/text()', TakeFirstL())
        loaderJob.add_xpath('job_recruNums', '//div[@class="c-view-con"]/div[4]/ul[2]/li[3]/text()') # 招聘人数
        loaderJob.add_xpath('job_benefits', '//div[@class="view-title-box mt_0"]/div[2]/h1/div/span/text()') # 工作福利
        loaderJob.add_xpath('job_desc_req', '//div[@class="c-view-module add-bor-b view-wid934 clearfix"]/div[3]/div[2]/p/text()') # zhuanye要求
        loaderJob.add_xpath('job_company','//div[@class="c-view-module add-bor-b view-wid680 clearfix"]/ul/li/a/text()', TakeNumL(1))
        loaderJob.add_xpath('job_miniEdu', '//div[@class="add-div800 clearfix"]/ul/li[2]/text()', TakeFirstL())
        loaderJob.add_xpath('job_experience', '//div[@class="add-div800 clearfix"]/ul/li[1]/span[1]/text()', TakeNumL(1))
        loaderJob.add_xpath('job_reqLan', '//div[@class="c-view-module add-bor-b view-wid934 clearfix"]/div[4]/div[2]/p/text()')
        loaderJob.add_xpath('job_reqAge', '//div[@class="c-view-module add-bor-b view-wid934 clearfix"]/div[1]/ul[2]/li[1]/text()')
        loaderJob.add_xpath('job_salary', '//div[@class="c-view-con"]/div[5]/ul/li/span/a/text()')
        loaderJob.add_xpath('job_location', '//div[@class="c-view-con"]/div[4]/ul/li[3]/a/text()')
        loaderJob.add_xpath('job_update', '//div[@class="c-view-con"]/div[4]/ul/li[4]/span[2]/text()')
        loaderJob.add_xpath('job_desc_resp', '//div[@class="c-view-con"]/div[6]/div/div[1]/div[2]/p/text()') # 岗位职责
        loaderJob.add_xpath('job_desc_req' , '//div[@class="c-view-con"]/div[6]/div/div[2]/div[2]/p/text()') # 要求（任职资格）

        loaderCom.add_xpath('url', u'//ul[@class="view-ul"]/li/span[text()="公司名称："]/../a/@href')
        loaderCom.add_xpath('com_name', u'//ul[@class="view-ul"]/li/span[text()="公司名称："]/../a/text()')
        yield loaderJob.load_item()
        yield loaderCom.load_item()

